// Copyright (C) 2022 Check Point Software Technologies Ltd. All rights reserved.

// Licensed under the Apache License, Version 2.0 (the "License");
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "ParserPairs.h"
#include "Waf2Util.h"
#include "debug.h"

USE_DEBUG_FLAG(D_WAAP_PARSER_PAIRS);
USE_DEBUG_FLAG(D_WAAP);

const std::string ParserPairs::m_parserName = "ParserPairs";

ParserPairs::ParserPairs(
    IParserStreamReceiver &receiver,
    size_t parser_depth,
    char separatorChar,
    bool should_decode_per,
    bool should_decode_plus_sign
) :
    m_receiver(receiver),
    m_state(s_start),
    m_escapedLen(0),
    m_separatorChar(separatorChar),
    m_escapedCharCandidate(0),
    should_decode_percent(should_decode_per),
    should_decode_plus(should_decode_plus_sign),
    m_parser_depth(parser_depth),
    m_bracket_counter(0)
{
    dbgTrace(D_WAAP)
        << "should_decode_percent="
        << should_decode_per
        << "parser_depth="
        << parser_depth;

    // TODO:: is there a need for this?
    memset(m_escaped, 0, sizeof(m_escaped));
}

ParserPairs::~ParserPairs()
{}

size_t
ParserPairs::push(const char *buf, size_t len)
{
    size_t i = 0;
    size_t mark = 0;
    char c;
    int is_last = 0;

    dbgTrace(D_WAAP) << "ParserPairs::push(): starting (len=" << len << ")";

    if (len == 0) {
        dbgTrace(D_WAAP) << "ParserPairs::push(): end of data signal! m_state=" << m_state;
        // flush unescaped data collected (if any)
        if (m_escapedLen > 0) {
            if (m_state == s_key_start) {
                if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
                    m_state = s_error;
                    return i;
                }
            } else if (m_state == s_value_start) {
                if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
                    m_state = s_error;
                    return i;
                }
            }
            m_escapedLen = 0;
        }

        if (m_receiver.onKvDone() != 0) {
            m_state = s_error;
            return i;
        }

        return 0;
    }

    while (i < len) {
        c = buf[i];
        is_last = (i == (len - 1));

        // Checking valid char urlencode
        if (c < 32) {
            // Bitwise operation checking since `char` can be either signed on unsigned depending on arch
            if (!isspace(c) && ((c & 0x80) == 0x00)) {
                dbgDebug(D_WAAP_PARSER_PAIRS)
                    << "invalid URL encoding character: "
                    << c
                    << " decimal value is "
                    << c + 0;
                m_state = s_error;
                return i;
            }
        }

        dbgTrace(D_WAAP_PARSER_PAIRS)
            << "ParserPairs::push(): state="
            << m_state
            << "; ch='"
            << c
            << "' m_bracket_counter = "
            << m_bracket_counter;

        switch (m_state) {
            case s_start: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_start";
                // m_state = s_key_start;

                // fallthrough //
                CP_FALL_THROUGH;
            }
            case s_key_start: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_key_start";
                mark = i;
                m_state = s_key;

                // fallthrough //
                CP_FALL_THROUGH;
            }
            case s_key: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_key";

                // skip leading spaces in the key
                if (isspace(c)) {
                    m_state = s_key_start; // skip the space character without including it in the output
                    break;
                }

                if (c == '%' && should_decode_percent) {
                    if (i - mark > 0) {
                        if (m_receiver.onKey(buf + mark, i - mark) != 0) {
                            m_state = s_error;
                            return i;
                        }
                    }
                    m_state = s_key_escaped1;
                    break;
                } else if (c == '+' && should_decode_plus) {
                    // convert plus character to space
                    if (i - mark > 0) {
                        if (m_receiver.onKey(buf + mark, i - mark) != 0) {
                            m_state = s_error;
                            return i;
                        }
                        mark = i;
                    }
                    m_escaped[m_escapedLen] = ' ';
                    m_escapedLen++;
                    if (m_escapedLen >= MAX_PAIRS_ESCAPED_SIZE) {
                        if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
                            m_state = s_error;
                            return i;
                        }
                        m_escapedLen = 0;
                    }
                    m_state = s_key_start;
                    break;
                } else {
                    // flush unescaped data collected (if any)
                    if (m_escapedLen > 0) {
                        if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
                            m_state = s_error;
                            return i;
                        }
                        m_escapedLen = 0;
                        mark = i;
                    }
                }
                if (c == m_separatorChar) {
                    // this happens when there is a key without value. Example: ?p&a=b&k&%61&blah
                    // in this case we emit the key, but not the value, and send onKvDone to cause
                    // the receiver to process the pair: key will be provided with no value.
                    if (m_receiver.onKey(buf + mark, i - mark) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    if (m_receiver.onKvDone() != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_state = s_key_start;
                    break;
                }
                if (c == '=') {
                    if (m_receiver.onKey(buf + mark, i - mark) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_state = s_value_start;
                    break;
                }
                if (is_last) {
                    if (m_receiver.onKey(buf + mark, (i - mark) + 1) != 0) {
                        m_state = s_error;
                        return i;
                    }
                }
                break;
            }
            case s_key_escaped1: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_key_escaped1";
                bool valid;
                unsigned char v = from_hex(c, valid);
                if (!valid) { // character right after the '%' is not a valid hex char.
                    // dump escaped chars
                    if (m_escapedLen > 0 && m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_escapedLen = 0;
                    // return the '%' character back to the output.
                    if (m_receiver.onKey("%", 1) != 0) {
                        return i;
                    }

                    // If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences
                    if (c != '%') {
                        // pass the non-hex character back to the output too.
                        if (m_receiver.onKey(&c, 1) != 0) {
                            return i;
                        }

                        // otherwise (the character is not '%'), switch back to the s_key state
                        m_state = s_key_start;
                    }
                    break;
                }

                m_escapedCharCandidate = c;
                m_escaped[m_escapedLen] = v << 4;
                m_state = s_key_escaped2;
                break;
            }
            case s_key_escaped2: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_key_escaped2";
                bool valid;
                unsigned char v = from_hex(c, valid);
                if (!valid) {
                    // This situation (2nd character is not valid hex) is not treated right now.
                    // In this case, v will be equal to 0 and output character will be invalid one.

                    // dump escaped chars
                    if (m_escapedLen > 0 && m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_escapedLen = 0;

                    // return the '%' character back to the output.
                    if (m_receiver.onKey("%", 1) != 0) {
                        return i;
                    }
                    // add the character that was thought to be escaped value
                    if (m_receiver.onKey(&m_escapedCharCandidate, 1)) {
                        return i;
                    }

                    // re parse the character as a key (i is incremented back to current value)
                    i--;
                    m_state = s_key_start;
                    break;
                }
                m_escapedCharCandidate = 0;
                m_escaped[m_escapedLen] |= v;
                m_escapedLen++;
                if (m_escapedLen >= MAX_PAIRS_ESCAPED_SIZE) {
                    if (m_receiver.onKey(m_escaped, m_escapedLen) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_escapedLen = 0;
                }
                m_state = s_key_start;
                break;
            }
            case s_value_start: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_value_start";
                mark = i;
                m_state = s_value;

                // fallthrough //
                CP_FALL_THROUGH;
            }
            case s_value: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_value";
                if (c == '%' && should_decode_percent) {
                    if (i - mark > 0) {
                        if (m_receiver.onValue(buf + mark, i - mark) != 0) {
                            m_state = s_error;
                            return i;
                        }
                    }
                    m_state = s_value_escaped1;
                    break;
                } else if (c == '+' && should_decode_plus) {
                    // convert plus character to space
                    if (i - mark > 0) {
                        if (m_receiver.onValue(buf + mark, i - mark) != 0) {
                            m_state = s_error;
                            return i;
                        }
                    }
                    m_escaped[m_escapedLen] = ' ';
                    m_escapedLen++;
                    if (m_escapedLen >= MAX_PAIRS_ESCAPED_SIZE) {
                        if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
                            m_state = s_error;
                            return i;
                        }
                        m_escapedLen = 0;
                    }
                    m_state = s_value_start;
                    break;
                } else {
                    // flush unescaped data collected (if any)
                    if (c == '{' || c == '[') m_bracket_counter++;
                    if (c == '}' || c == ']') m_bracket_counter--;
                    if (m_escapedLen > 0) {
                        if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
                            m_state = s_error;
                            return i;
                        }
                        m_escapedLen = 0;
                        mark = i;
                    }
                }
                if (c == m_separatorChar) {
                    if (m_bracket_counter) {
                        if (m_escapedLen > 0) {
                            if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
                                m_state = s_error;
                                return i;
                            }
                            m_escapedLen = 0;
                            mark = i;
                        }
                    } else {
                        if (m_receiver.onValue(buf + mark, i - mark) != 0) {
                            dbgWarning(D_WAAP_PARSER_PAIRS) << "ParserPairs::push() s_value : failed on value";
                            m_state = s_error;
                            return i;
                        }
                        if (m_receiver.onKvDone() != 0) {
                            dbgWarning(D_WAAP_PARSER_PAIRS) << "ParserPairs::push() : s_value :  failed on KV";
                            m_state = s_error;
                            return i;
                        }
                        m_state = s_key_start;
                        break;
                    }
                }
                if (is_last) {
                    if (m_receiver.onValue(buf + mark, (i - mark) + 1) != 0) {
                        m_state = s_error;
                        return i;
                    }
                }
                break;
            }
            case s_value_escaped1: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_value_escaped1";
                bool valid;
                unsigned char v = from_hex(c, valid);
                if (!valid) { // character right after the '%' is not a valid hex char.
                    // dump escaped chars
                    if (m_escapedLen > 0 && m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_escapedLen = 0;
                    // return the '%' character back to the output.
                    if (m_receiver.onValue("%", 1) != 0) {
                        return i;
                    }

                    // If the character is '%' - stay in the same state (correctly treat '%%%%hhh' sequences)
                    if (c != '%') {
                        // pass the non-hex character back to the output too.
                        if (m_receiver.onValue(&c, 1) != 0) {
                            return i;
                        }

                        // otherwise (the character is not '%'), switch back to the s_value state
                        m_state = s_value_start;
                    }
                    break;
                }
                m_escapedCharCandidate = c;
                m_escaped[m_escapedLen] = v << 4;
                m_state = s_value_escaped2;
                break;
            }
            case s_value_escaped2: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_value_escaped2";
                bool valid;
                unsigned char v = from_hex(c, valid);
                if (!valid) {
                    // This situation (2nd character is not valid hex) is not treated right now.
                    // In this case, v will be equal to 0 and output character will be invalid one.

                    // dump escaped chars
                    if (m_escapedLen > 0 && m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_escapedLen = 0;

                    // return the '%' character back to the output.
                    if (m_receiver.onValue("%", 1) != 0) {
                        return i;
                    }
                    // add the character that was thought to be escaped value
                    if (m_receiver.onValue(&m_escapedCharCandidate, 1)) {
                        return i;
                    }

                    // re parse the character as a key (i is incremented back to current value)
                    i--;
                    m_state = s_value_start;
                    break;
                }
                m_escapedCharCandidate = 0;
                m_escaped[m_escapedLen] |= v;
                m_escapedLen++;
                if (m_escapedLen >= MAX_PAIRS_ESCAPED_SIZE) {
                    if (m_receiver.onValue(m_escaped, m_escapedLen) != 0) {
                        m_state = s_error;
                        return i;
                    }
                    m_escapedLen = 0;
                }
                m_state = s_value_start;
                break;
            }
            case s_error: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): s_error";
                return 0;
            }
            default: {
                dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): URL parser unrecoverable error";
                m_state = s_error;
                return 0;
            }
        } // end of switch()
        ++i;
    }

    dbgTrace(D_WAAP_PARSER_PAIRS) << "ParserPairs::push(): finished: len=" << len;
    return len;
}

void
ParserPairs::finish()
{
    push(NULL, 0);
}

const std::string &
ParserPairs::name() const
{
    return m_parserName;
}

bool
ParserPairs::error() const
{
    return m_state == s_error;
}
